<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2016-2017 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>How CDAP Pipelines Work</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="CDAP Pipelines" href="index.html" />
    <link rel="next" title="Cloud Runtimes" href="../cloud-runtimes/index.html" />
    <link rel="prev" title="Packaging Plugins" href="developing-plugins/packaging-plugins.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: developer-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting-started/index.html"> 入门指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/sandbox/index.html">CDAP Sandbox</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/zip.html">二进制 Zip 文件</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/zip.html#cdap-sandbox">启动和停止 CDAP Sandbox</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/virtual-machine.html">虚拟机镜像</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/docker.html">Docker 镜像</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/quick-start.html">快速入门</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/dev-env.html">搭建开发环境</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/start-stop-cdap.html">启动和停止 CDAP</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/building-apps.html">构建并运行应用</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../overview/index.html"> 概述</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../overview/anatomy.html"> 大数据应用剖析</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/modes.html"> 模式和组件</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/abstractions.html"> 核心概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/interfaces.html"> 编程接口</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../building-blocks/index.html"> 抽象概念</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/core.html"> Core Abstractions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/applications.html"> Applications</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/datasets/index.html"> Datasets</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/overview.html"> Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/table.html"> Table API</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/fileset.html"> FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/partitioned-fileset.html"> Partitioned FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/time-partitioned-fileset.html"> TimePartitioned FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/system-custom.html"> System and Custom Datasets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/permissions.html"> Dataset Permissions</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/cube.html"> Cube Dataset</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/mapreduce-programs.html"> MapReduce Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/plugins.html"> Plugins</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/schedules.html"> Schedules</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/secure-keys.html"> Secure Keys</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/services.html"> Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/spark-programs.html"> Spark Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/workers.html"> Workers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/workflows.html"> Workflows</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/artifacts.html"> Artifacts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/program-lifecycle.html"> Program Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/namespaces.html"> Namespaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/transaction-system.html"> Transaction System</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/transactional-messaging-system.html"> Transactional Messaging System</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../metadata/index.html"> 元数据</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../metadata/system-metadata.html"> System Metadata</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/discovery-lineage.html"> Discovery and Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/field-lineage.html"> Field Level Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/audit-logging.html"> Audit Logging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/metadata-ui.html"> CDAP Metadata UI</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/programmatic-metadata.html"> Accessing metadata programmatically</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 数据流管道</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="concepts-design.html"> Concepts and Design</a></li>
<li class="toctree-l2"><a class="reference internal" href="getting-started.html"> Getting Started</a></li>
<li class="toctree-l2"><a class="reference internal" href="studio.html"> CDAP Studio</a></li>
<li class="toctree-l2"><a class="reference internal" href="creating-pipelines.html"> Creating Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="running-pipelines.html"> Running Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="plugin-management.html"> Plugin Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="plugins/index.html"> Plugin Reference</a><ul>
<li class="toctree-l3"><a class="reference internal" href="plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="developing-pipelines.html"> Developing Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="developing-plugins/index.html"> Developing Plugins</a><ul>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/plugin-basics.html">Plugin Basics</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/creating-a-plugin.html">Creating a Plugin</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/presentation-plugins.html">Plugin Presentation</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/testing-plugins.html">Testing Plugins</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/packaging-plugins.html">Packaging Plugins</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="current reference internal" href="#"> How CDAP Pipelines Work</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../cloud-runtimes/index.html"> 云平台运行</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/concepts/index.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/provisioners/index.html"> Provisioners</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/gcp-dataproc.html">Google Dataproc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/aws-emr.html">Amazon Elastic MapReduce</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/remote-hadoop.html">Remote Hadoop</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/profiles/index.html"> Profiles</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/creating-profiles.html">Creating Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/assigning-profiles.html">Assigning Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/admin-controls.html">Admin Controls</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/example/index.html"> Example</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../security/index.html"> 安全</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../security/client-authentication.html">Client Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/cdap-authentication-clients-java.html">CDAP Authentication Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/cdap-authentication-clients-python.html">CDAP Authentication Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/custom-authentication.html">Custom Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/authorization-extensions.html">Authorization Extensions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../testing/index.html"> 测试和调试</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../testing/testing.html"> Testing a CDAP Application</a></li>
<li class="toctree-l2"><a class="reference internal" href="../testing/debugging.html"> Debugging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../testing/troubleshooting.html"> Troubleshooting</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../ingesting-tools/index.html"> 数据融合</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-java.html">CDAP Stream Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-python.html">CDAP Stream Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-ruby.html">CDAP Stream Client for Ruby</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-flume.html">CDAP Flume</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-exploration/index.html"> 数据探索</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/filesets.html"> Fileset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/tables.html"> Table Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/object-mapped-tables.html"> ObjectMappedTable Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/custom-datasets.html"> Custom Dataset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/hive-execution-engines.html"> Hive Execution Engines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/index.html"> 高级主题</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../advanced/application-logback.html"> Application Logback</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/best-practices.html"> Best Practices</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/class-loading.html"> Class Loading</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/configuring-resources.html"> Configuring Program Resources</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/program-retry-policies.html"> Program Retry Policies</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="how-cdap-pipelines-work">
<span id="cdap-pipelines-how-pipelines-work"></span><h1>How CDAP Pipelines Work<a class="headerlink" href="#how-cdap-pipelines-work" title="Permalink to this headline">🔗</a></h1>
<p><strong>A “behind-the-scenes” look at CDAP Pipelines</strong></p>
<p>CDAP Pipelines is a capability of CDAP and combines a user interface with back-end services
to enable the building, deploying, and managing of data pipelines. It has no dependencies
outside of CDAP, and all pipelines run within a Hadoop cluster.</p>
<div class="section" id="architecture">
<h2>Architecture<a class="headerlink" href="#architecture" title="Permalink to this headline">🔗</a></h2>
<p>CDAP pipelines allows users to build complex data pipelines, either simple ETL
(extract-transform-load) or more complicated 数据流管道 on Hadoop.</p>
<p>Data pipelines—unlike the linear ETL pipelines—are often not linear in nature
and require the performing of more complex transformations including forks and joins at
the record and feed level. They can be configured to perform various functions at
different times, including machine-learning algorithms and custom processing.</p>
<p>Pipelines need to support the creation of complex processing workloads that are
repeatable, high-available and easily maintainable.</p>
</div>
<div class="section" id="logical-versus-physical-pipelines">
<h2>Logical versus Physical Pipelines<a class="headerlink" href="#logical-versus-physical-pipelines" title="Permalink to this headline">🔗</a></h2>
<div class="figure align-right" id="id1" style="width: 50%">
<a class="bordered-image-top-margin reference internal image-reference" href="../_images/logical-physical-pipelines.png"><img alt="../_images/logical-physical-pipelines.png" class="bordered-image-top-margin" src="../_images/logical-physical-pipelines.png" style="width: 3in;" /></a>
<p class="caption"><span class="caption-text"><strong>Logical</strong> and <strong>Physical</strong> Pipelines, converted by a <strong>Planner</strong></span></p>
</div>
<p id="cdap-pipelines-how-pipelines-work-logical-start">Within CDAP, there is the concept of <em>logical</em> and <em>physical</em> pipelines, converted by a
planner, and then run in an execution environment.</p>
<p>A <strong>logical pipeline</strong> is the view of the pipeline as seen in the CDAP Studio and the
CDAP UI. It is the view composed of sources, sinks, and other plugins, and does
not show the underlying technology used to actually manifest and run the pipeline.</p>
<p>This view of a pipeline focuses on the functional requirements of the pipeline, rather
than the physical runtime. It’s closer to the inherent nature of processing as viewed by a
user. This view isolates it from the volatile physical pipeline, which can be operated in
different runtime environments.</p>
<p>A <strong>physical pipeline</strong> is the manifestation of a logical pipeline as a CDAP application,
which is a collection of programs and services that read and write through the data
abstraction layer in CDAP. Physical view elements are those elements that actually run
during the execution of a data pipeline on a Hadoop cluster. They execute the MapReduce
Programs, Spark, Spark Streaming, Tigon, Workflows, and so on. The physical pipeline view
is based on the particular underlying technologies used and, as such, can be changed
dynamically.</p>
<p>A <strong>planner</strong> is responsible for converting the logical pipeline to a physical pipeline. The
planner analyzes the logical view of the pipeline and converts it to a physical execution
plan, performing optimizations, and bundling functions into one or more jobs.</p>
</div>
<div class="section" id="execution-environment">
<span id="cdap-pipelines-how-pipelines-work-logical-end"></span><h2>Execution Environment<a class="headerlink" href="#execution-environment" title="Permalink to this headline">🔗</a></h2>
<p>The <strong>execution environment</strong> is the actual runtime environment where all the components of
the data pipeline are executed on the Hadoop cluster by CDAP. MapReduce, Spark, Spark
Streaming, Tigon are part of this environment that allows the execution of the data
pipeline. The planner maps the logical pipeline to physical pipeline using the environment
runtimes available.</p>
</div>
<div class="section" id="functional-components">
<h2>Functional Components<a class="headerlink" href="#functional-components" title="Permalink to this headline">🔗</a></h2>
<p>These are the different functional components that are utilized within CDAP pipelines:</p>
<div class="figure align-center" id="id2" style="width: 100%">
<a class="bordered-image-top-margin reference internal image-reference" href="../_images/cdap-pipelines-architecture.png"><img alt="../_images/cdap-pipelines-architecture.png" class="bordered-image-top-margin" src="../_images/cdap-pipelines-architecture.png" style="width: 6in;" /></a>
<p class="caption"><span class="caption-text"><strong>Functional Architecture of CDAP Pipelines</strong></span></p>
</div>
<div class="section" id="application">
<h3>Application<a class="headerlink" href="#application" title="Permalink to this headline">🔗</a></h3>
<p>An <strong>application</strong> is a standardized container framework for defining all services. It is
responsible for managing the lifecycle of programs and datasets within an application.
Each CDAP pipeline is converted into a CDAP application, and deployed and managed
independently.</p>
</div>
<div class="section" id="application-template">
<h3>Application Template<a class="headerlink" href="#application-template" title="Permalink to this headline">🔗</a></h3>
<p>An <strong>application template</strong> is a user-defined, reusable, reconfigurable pattern of an
application. It is parameterized by a configuration that can be reconfigured upon
deployment. It provides a generic version of an application which can be repurposed,
instead of requiring the ongoing creation of specialized applications. The
re-configurability and modularization of the application is exposed through plugins.
CDAP provides its own, system-defined application templates, though new user-defined
ones can be added that can use the DAG interface of the CDAP Studio. The application
templates are configured using the CDAP Studio and deployed as applications into a Hadoop
cluster.</p>
<p>Application templates consist of a definition of its different components—processing,
workflow, and dataset—in the form of a configuration. Once a configuration is passed to
the template, a CDAP application is constructed by combining the necessary pieces to form
an executable pipeline. An application template consists of:</p>
<ul class="simple">
<li>A definition of the different processing supported by the template. These can include
MapReduce, Service, Spark, Spark Streaming, Tigon, Worker, and Workflow. In the case of a
CDAP Pipeline, it (currently) can include MapReduce, Spark, Tigon, Worker, and
Workflow.</li>
<li>A planner is optional; however, CDAP includes a planner that translates a logical
pipeline into a physical pipeline and pieces together all of the processing components
supported by the template.</li>
</ul>
</div>
<div class="section" id="plugin">
<h3>Plugin<a class="headerlink" href="#plugin" title="Permalink to this headline">🔗</a></h3>
<p>A <strong>plugin</strong> is a customizable module, exposed and used by an application template. It
simplifies adding new features or extending the capability of an application. Plugin
implementations are based on interfaces exposed by the application templates. Currently, CDAP
pipeline application templates expose Source, Transform, and Sink interfaces, which have
multiple implementations. Future Application Templates will expose more plugins such as
Compute, Arbitrary MR, and Spark in addition to those mentioned above.</p>
</div>
<div class="section" id="artifact">
<h3>Artifact<a class="headerlink" href="#artifact" title="Permalink to this headline">🔗</a></h3>
<p>An <strong>artifact</strong> is a versioned packaging format used to aggregate applications, datasets, and
plugins along with associated metadata. It is a JAR (Java Archive) containing Java classes
and resources.</p>
</div>
<div class="section" id="cdap-studio">
<h3>CDAP Studio<a class="headerlink" href="#cdap-studio" title="Permalink to this headline">🔗</a></h3>
<p><strong>CDAP Studio</strong> is a visual development environment for building data pipelines on
Hadoop. It has a click-and-drag interface for building and configuring data pipelines. It
also supports the ability to develop, run, automate, and operate pipelines from within
the CDAP UI. The pipeline interface integrates with the CDAP interface, allowing
drill-down debugging of pipelines and can build metrics dashboards to closely monitor
pipelines through CDAP. The CDAP Studio integrates with other capabilities.</p>
</div>
<div class="section" id="testing-and-automation-framework">
<h3>Testing and Automation Framework<a class="headerlink" href="#testing-and-automation-framework" title="Permalink to this headline">🔗</a></h3>
<p>An end-to-end <strong>JUnit framework</strong> (written in Java) is available in CDAP that allows
developers to test their application templates and plugins during development. It is built
as a modular framework that allows for the testing of individual components. It runs
in-memory in CDAP, as the abstracting to in-memory structures makes for easier debugging
(shorter stack traces). The tests can be integrated with continuous integration (CI) tools
such as Bamboo, Jenkins, and TeamCity.</p>
</div>
</div>
<div class="section" id="implementation-of-cdap-pipelines">
<h2>Implementation of CDAP Pipelines<a class="headerlink" href="#implementation-of-cdap-pipelines" title="Permalink to this headline">🔗</a></h2>
<p>CDAP pipelines are built as a CDAP capability, with three major components:</p>
<ul class="simple">
<li><strong>CDAP Studio,</strong> the visual editor, running in a browser</li>
<li><strong>Application Templates,</strong> packaged as artifacts, either system- or user-defined</li>
<li><strong>Plugins,</strong> extensions to the application templates, in a variety of different types
and implementations</li>
</ul>
<p>The <strong>CDAP Studio</strong> interfaces with CDAP using RESTful APIs.</p>
<p>The <strong>application templates</strong>—ETL Batch, Data Pipeline Batch, and ETL Real-time—are available
by default from within the CDAP Studio. Additional application templates, such as Data Pipeline
Real-time and Spark Streaming, are being added in upcoming releases.</p>
<p>The ETL Batch and ETL Real-time application templates expose three plugin types: source,
transform, and sink. The Data Pipeline Batch application template exposes three additional
plugin types: aggregate, compute, and model. Additional plugin types can be created and
will be added in upcoming releases.</p>
<p>There are many <strong>different plugins</strong> that implement each of these types available
“out-of-the-box” in CDAP. New plugins can be implemented using the
public APIs exposed by the application templates. When an application template or a plugin
is deployed within CDAP, it is referred to as an <strong>artifact</strong>. CDAP provides capabilities to
manage the different versions of both the application templates and the plugins.</p>
<div class="figure align-center" id="id3" style="width: 100%">
<a class="bordered-image-top-margin reference internal image-reference" href="../_images/cdap-pipelines-internals.png"><img alt="../_images/cdap-pipelines-internals.png" class="bordered-image-top-margin" src="../_images/cdap-pipelines-internals.png" style="width: 6in;" /></a>
<p class="caption"><span class="caption-text"><strong>Internals of CDAP Pipelines</strong></span></p>
</div>
</div>
<div class="section" id="building-of-a-pipeline">
<h2>Building of a Pipeline<a class="headerlink" href="#building-of-a-pipeline" title="Permalink to this headline">🔗</a></h2>
<p>Here is how the CDAP Studio works with CDAP to build a pipeline, beginning
with a user creating a new pipeline in the CDAP Studio. First, the components
of the CDAP Studio:</p>
<div class="figure align-center" id="id4" style="width: 100%">
<a class="bordered-image-top-margin reference internal image-reference" href="../_images/cdap-studio-annotated.png"><img alt="../_images/cdap-studio-annotated.png" class="bordered-image-top-margin" src="../_images/cdap-studio-annotated.png" style="width: 6in;" /></a>
<p class="caption"><span class="caption-text"><strong>CDAP Studio, showing different UI components</strong></span></p>
</div>
<ul>
<li><p class="first"><strong>User Selects an Application Template</strong></p>
<p>A user building a pipeline within the CDAP Studio will select a pipeline type, which is
essentially picking an application template. They will pick one of ETL Batch, ETL
Real-time, or Data Pipeline. Other application templates such as Spark Streaming will be
available in the future.</p>
</li>
<li><p class="first"><strong>Retrieve the Plugins types supported by the selected Application Template</strong></p>
<p>Once a user has selected an application template, the Studio makes a request to
CDAP for the different plugin types supported by the application template. In the case
of the ETL Batch pipeline, CDAP will return Source, Transform, and Sink as plugin types.
This allows the Studio to construct the selection drawer in the left sidebar of
the UI.</p>
</li>
<li><p class="first"><strong>Retrieve the Plugin definitions for each Plugin type</strong></p>
<p>CDAP Studio then makes a request to CDAP for each plugin type, requesting all plugin
implementations available for each plugin type.</p>
</li>
<li><p class="first"><strong>User Builds the CDAP Pipeline</strong></p>
<p>The user then uses the Studio’s canvas to create a pipeline with the available
plugins.</p>
</li>
<li><p class="first"><strong>Validation of the CDAP Pipeline</strong></p>
<p>The user can request at any point that the pipeline be validated. This request is
translated into a RESTful API call to CDAP, which is then passed to the application
template, which validates whether the pipeline is valid.</p>
</li>
<li><p class="first"><strong>Application Template Configuration Generation</strong></p>
<p>As the user is building a pipeline, the Studio is building a JSON configuration
that, when completed, will be passed to the application template to configure and create
an application that is deployed to the cluster.</p>
</li>
<li><p class="first"><strong>Converting a logical into a physical Pipeline and registering the Application</strong></p>
<p>When the user publishes the pipeline, the configuration generated by the Studio
is passed to the application template as part of the creation of the Application. The
application template takes the configuration, passes it through a planner to create a
physical layout, appropriately generates an application specification and registers the
specification with CDAP as an application.</p>
</li>
<li><p class="first"><strong>Managing the physical Pipeline</strong></p>
<p>Once the application is registered with CDAP, the pipeline is ready to be started. If it
was scheduled, the schedule is ready to be enabled. The CDAP UI then uses the CDAP
RESTful APIs to manage the pipeline’s lifecycle. The pipeline can be managed from
CDAP through the CDAP UI, by using the CDAP CLI, or by using the RESTful APIs.</p>
</li>
<li><p class="first"><strong>Monitoring the physical Pipeline</strong></p>
<p>As CDAP pipelines are run as CDAP applications, their logs and metrics are
aggregated by the CDAP system and available using RESTful APIs.</p>
</li>
</ul>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Packaging Plugins" href="developing-plugins/packaging-plugins.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Cloud Runtimes" href="../cloud-runtimes/index.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>